In [2]:
#Inports Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker 
import plotly.express as px
import plotly.graph_objs as go
In [3]:
df = pd.read_csv(r'..\datasets\vgsales.csv')
In [4]:
# Defines regions and their column names
regions = {
    'North America': 'NA_Sales',
    'Europe': 'EU_Sales',
    'Japan': 'JP_Sales',
    'Other Regions': 'Other_Sales'
}

# Grouping by Platform and counting(sum) up the sales for each region
platform_sales = df.groupby('Platform')[list(regions.values())].sum()

# Creates a plot (Plotly)
fig = go.Figure()

# x,y 
for region, column in regions.items():
    fig.add_trace(go.Bar(
        x=platform_sales.index,
        y=platform_sales[column],
        name=region
    ))

# Drop-down menu
fig.update_layout(
    updatemenus=[
        {
            'buttons': [
                {
                    'label': region,
                    'method': 'update',
                    'args': [{'visible': [column == r for r in regions.values()]}]
                }
                for region, column in regions.items()
            ],
            'direction': 'down',
            'showactive': True,
        }
    ],
    title='Video Game Sales by Platform and Region (1980-2002)',
    xaxis_title='Platform',
    yaxis_title='Sales (millions)',
    barmode='stack',
)

fig.show()
In [5]:
# Define regions and their column names
regions = {
    'North America': 'NA_Sales',
    'Europe': 'EU_Sales',
    'Japan': 'JP_Sales',
    'Other Regions': 'Other_Sales'
}

# Grouping by Genre and counting(sum) up the sales for each region
genre_sales = df.groupby('Genre')[list(regions.values())].sum()

# Creates a plot (Plotly)
fig = go.Figure()

# x,y 
for region, column in regions.items():
    fig.add_trace(go.Bar(
        x=genre_sales.index,
        y=genre_sales[column],
        name=region
    ))

# Drop-down menu
fig.update_layout(
    updatemenus=[
        {
            'buttons': [
                {
                    'label': region,
                    'method': 'update',
                    'args': [{'visible': [column == r for r in regions.values()]}]
                }
                for region, column in regions.items()
            ],
            'direction': 'down',
            'showactive': True,
        }
    ],
    title='Video Game Sales by Genre and Region (1980-2020)',
    xaxis_title='Genre',
    yaxis_title='Sales (millions)',
    barmode='stack',
)

fig.show()
In [6]:
# Group by Year and Genre and sum up the sales
year_genre_sales = df.groupby(['Year', 'Genre']).sum()['Global_Sales'].unstack().fillna(0)

# Plot (Seaborn)
plt.figure(figsize=(12, 10))
year_genre_sales.plot(kind='area', stacked=True, colormap='tab20c')

# Customizing the plot
plt.title('Comparison of Global Sales by Genre Over the Years', fontsize=12, fontweight='bold')
plt.xlabel('Year', fontsize=10)
plt.ylabel('Global Sales (millions)', fontsize=10)
plt.grid(axis='y', linestyle='-', linewidth=0.7)  # Only horizontal gridlines
plt.legend(loc ='upper left' , bbox_to_anchor = (0,1))

plt.tight_layout()

plt.show()
<Figure size 1200x1000 with 0 Axes>
No description has been provided for this image
In [7]:
# Define regions and their column names in the dataset
regions = {
    'North America': 'NA_Sales',
    'Europe': 'EU_Sales',
    'Japan': 'JP_Sales',
    'Other Regions': 'Other_Sales'
}

#Creates an empty dictionary to store the top 10 games for each region // {}: This syntax creates an empty dictionary in Python.
top_10_games = {}

# For Loop to find 10  games based on sales
for region, column in regions.items():
    top_10_games[region] = df.nlargest(10, column)[['Rank', 'Name', 'Platform', column]]


# Visualization- Bar chart for top 10 games in each region 
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

for ax, (region, column) in zip(axes.flatten(), regions.items()):
    top_games = top_10_games[region]
    bars = ax.barh(top_games['Name'] + ' (' + top_games['Platform'] + ')', top_games[column], color='skyblue')
    ax.set_title(f"Top 10 Games in {region} (1980-2020)")
    ax.set_xlabel('Sales (millions)')
    ax.grid(axis='x', linestyle='--')  # x gridlines
    ax.grid(axis='y', linestyle='')    # y gridlines off
    ax.invert_yaxis()

plt.tight_layout()
plt.show()
No description has been provided for this image
In [8]:
# Define regions and their column names in the dataset
regions = {
    'North America': 'NA_Sales',
    'Europe': 'EU_Sales',
    'Japan': 'JP_Sales',
    'Other Regions': 'Other_Sales'
}

#Creates an empty dictionary to store the top 10 games for each region // {}: This syntax creates an empty dictionary in Python.
top_10_genres = {}

# For Loop to find top genres by regional sales
for region, column in regions.items():
    # Group by Genre and sum up the sales for each region
    region_sales = df.groupby('Genre')[column].sum()
    top_10_genres[region] = region_sales.nlargest(10)

# Visualization- Bar chart 
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

for ax, (region, column) in zip(axes.flatten(), regions.items()):
    top_genres = top_10_genres[region]
    bars = ax.barh(top_genres.index, top_genres.values, color='skyblue')
    ax.set_title(f"Top 10 Genres in {region} (1980-2020)")
    ax.set_xlabel('Sales (millions)')
    ax.grid(axis='x', linestyle='--')  # x gridlines
    ax.grid(axis='y', linestyle='')    # y gridlines off
    ax.invert_yaxis()

plt.tight_layout()
plt.show()
No description has been provided for this image